Missing Values Visualizer Examples


In [1]:
%load_ext autoreload

%autoreload 2

import sys
sys.path.append("./../..")

In [2]:
%reload_ext yellowbrick
%matplotlib inline
# Imports
import pandas as pd  
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.preprocessing import OneHotEncoder
from yellowbrick.contrib.missing import MissingValuesDispersion, MissingValuesBar
from sklearn.datasets import make_classification

Use the Horse Colic Data Set

Contains natural missing values in data


In [3]:
headers = pd.read_csv("./horse-colic.attrs")

In [4]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/horse-colic/horse-colic.data'

# Retrieve Data Set
df = pd.read_csv(url, delim_whitespace=True)
df.columns = headers.Attribute.values
df.replace(to_replace="?", value=np.nan, inplace=True,)

In [5]:
X = df.drop(['cp_data'], axis=1)
y = df['cp_data']

Using Missing Values Dispersion Chart


In [6]:
classes=['sick', 'healthy']

No target y passed in, produces mono-color chat


In [7]:
viz = MissingValuesDispersion(classes=classes)
viz.fit(X)
viz.show()


No handles with labels found to put in legend.

Target y passed in, produces Dispersion chart with elements colored by target variable


In [8]:
viz = MissingValuesDispersion(classes=classes)
viz.fit(X, y=y)
viz.show()


Using Missing Values Bar Chart

No target y passed in, produces mono-color bar chart


In [9]:
oz = MissingValuesBar(classes=classes)
oz.fit(X)
oz.show()


No handles with labels found to put in legend.

Target y passed in, produces stacked bar chart


In [10]:
oz = MissingValuesBar(classes=classes)
oz.fit(X, y=y)
oz.show()



In [ ]: